library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6

Functional tables

Building the distance matrices

#COG
icm_cog_metaGsize <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_COG.lengthNorm.metaGsizeNorm.counts.tbl",
                                header = TRUE, row.names = 1)
icm_cog_metaGsize <- t(icm_cog_metaGsize)
icm_cog_metaGsize_bray <- vegdist(icm_cog_metaGsize, method = "bray")

icm_cog_scg <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_COG.lengthNorm.SCGnorm.counts.tbl", 
                          header = TRUE, row.names = 1)
icm_cog_scg <- t(icm_cog_scg)
icm_cog_scg_bray <- vegdist(icm_cog_scg, method = "bray")

#KEGG
icm_kegg_metaGsize <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_KEGG.ko.lengthNorm.metaGsizeNorm.counts.tbl", header = TRUE, row.names = 1)
icm_kegg_metaGsize <- t(icm_kegg_metaGsize)
icm_kegg_metaGsize_bray <- vegdist(icm_kegg_metaGsize, method = "bray")

icm_kegg_scg <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_KEGG.ko.lengthNorm.SCGnorm.counts.tbl", 
                          header = TRUE, row.names = 1)
icm_kegg_scg <- t(icm_kegg_scg)
icm_kegg_scg_bray <- vegdist(icm_kegg_scg, method = "bray")

#PFAM
icm_pfam_metaGsize <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_pfam.lengthNorm.metaGsizeNorm.counts.tbl",
                                header = TRUE, row.names = 1)
icm_pfam_metaGsize <- t(icm_pfam_metaGsize)
icm_pfam_metaGsize_bray <- vegdist(icm_pfam_metaGsize, method = "bray")

icm_pfam_scg <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_pfam.lengthNorm.SCGnorm.counts.tbl", 
                          header = TRUE, row.names = 1)
icm_pfam_scg <- t(icm_pfam_scg)
icm_pfam_scg_bray <- vegdist(icm_pfam_scg, method = "bray")

Dendograms

#COG
icm_cog_metaGsize_hclust <- hclust(icm_cog_metaGsize_bray, method = "average")
plot(icm_cog_metaGsize_hclust, main = "COG metaGsize")

icm_cog_scg_hclust <- hclust(icm_cog_scg_bray, method = "average")
plot(icm_cog_scg_hclust, main = "COG SCG")

#KEGG
icm_kegg_metaGsize_hclust <- hclust(icm_kegg_metaGsize_bray, method = "average")
plot(icm_kegg_metaGsize_hclust, main = "KEGG metaGsize")

icm_kegg_scg_hclust <- hclust(icm_kegg_scg_bray, method = "average")
plot(icm_kegg_scg_hclust, main = "KEGG SCG")

#PFAM
icm_pfam_metaGsize_hclust <- hclust(icm_pfam_metaGsize_bray, method = "average")
plot(icm_pfam_metaGsize_hclust, main = "PFAM metaGsize")

icm_pfam_scg_hclust <- hclust(icm_pfam_scg_bray, method = "average")
plot(icm_pfam_scg_hclust, main = "PFAM SCG")

Observations: The dendograms corresponding to the same type of normalization are very similar, it also seems to be between the two types.

Heatmaps

#COG
heatmap(as.matrix(icm_cog_metaGsize_bray), Rowv = as.dendrogram(icm_cog_metaGsize_hclust), Colv = "Rowv", symm = TRUE,
        main = "COG metaGsize")

heatmap(as.matrix(icm_cog_scg_bray), Rowv = as.dendrogram(icm_cog_scg_hclust), Colv = "Rowv", symm = TRUE,
        main = "COG SCG")

#KEGG
heatmap(as.matrix(icm_kegg_metaGsize_bray), Rowv = as.dendrogram(icm_kegg_metaGsize_hclust), Colv = "Rowv", symm = TRUE,
        main = "KEGG metaGsize")

heatmap(as.matrix(icm_kegg_scg_bray), Rowv = as.dendrogram(icm_kegg_scg_hclust), Colv = "Rowv", symm = TRUE,
        main = "KEGG SCG")

#PFAM
heatmap(as.matrix(icm_pfam_metaGsize_bray), Rowv = as.dendrogram(icm_pfam_metaGsize_hclust), Colv = "Rowv", symm = TRUE,
        main = "PFAM metaGsize")

heatmap(as.matrix(icm_pfam_scg_bray), Rowv = as.dendrogram(icm_pfam_scg_hclust), Colv = "Rowv", symm = TRUE,
        main = "PFAM SCG")

NMDS

#COG
icm_cog_metaGsize_nmds <- monoMDS(icm_cog_metaGsize_bray)
icm_cog_metaGsize_nmds
## 
## Call:
## monoMDS(dist = icm_cog_metaGsize_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_cog_metaGsize, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.00955107 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 200 iterations: Maximum number of iterations (maxit) reached
plot(icm_cog_metaGsize_nmds, main = "COG metaGsize")

icm_cog_scg_nmds <- monoMDS(icm_cog_scg_bray)
icm_cog_scg_nmds
## 
## Call:
## monoMDS(dist = icm_cog_scg_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_cog_scg, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.004516829 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 200 iterations: Maximum number of iterations (maxit) reached
plot(icm_cog_scg_nmds, main = "COG SCG")

#KEGG
icm_kegg_metaGsize_nmds <- monoMDS(icm_kegg_metaGsize_bray)
icm_kegg_metaGsize_nmds
## 
## Call:
## monoMDS(dist = icm_kegg_metaGsize_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_kegg_metaGsize, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.01112545 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 200 iterations: Maximum number of iterations (maxit) reached
plot(icm_kegg_metaGsize_nmds, main = "KEGG metaGsize")

icm_kegg_scg_nmds <- monoMDS(icm_kegg_scg_bray)
icm_kegg_scg_nmds
## 
## Call:
## monoMDS(dist = icm_kegg_scg_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_kegg_scg, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.003476752 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 200 iterations: Maximum number of iterations (maxit) reached
plot(icm_kegg_scg_nmds, main = "KEGG SCG")

#PFAM
icm_pfam_metaGsize_nmds <- monoMDS(icm_pfam_metaGsize_bray)
icm_pfam_metaGsize_nmds
## 
## Call:
## monoMDS(dist = icm_pfam_metaGsize_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_pfam_metaGsize, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.007878882 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 200 iterations: Maximum number of iterations (maxit) reached
plot(icm_pfam_metaGsize_nmds, main = "PFAM metaGsize")

icm_pfam_scg_nmds <- monoMDS(icm_pfam_scg_bray)
icm_pfam_scg_nmds
## 
## Call:
## monoMDS(dist = icm_pfam_scg_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_pfam_scg, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.005501107 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 200 iterations: Maximum number of iterations (maxit) reached
plot(icm_pfam_scg_nmds, main = "PFAM SCG")

Observations: Low stress, dendogram's clusters are identified in the plots.

Mantel tests

Mantel tests for the correlation of functional matrices. The goal is to test the correalation between and within the metaGsize and the SCG normalizations from COG, KEGG and PFAM tables.

#metaGsize vs SCG
plot(icm_cog_metaGsize_bray, icm_cog_scg_bray, main = "COG: metaGsize vs SCG")
abline(0,1)

mantel(icm_cog_metaGsize_bray, icm_cog_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_cog_metaGsize_bray, ydis = icm_cog_scg_bray) 
## 
## Mantel statistic r: 0.956 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0441 0.0660 0.0885 0.1136 
## Permutation: free
## Number of permutations: 999
plot(icm_kegg_metaGsize_bray, icm_kegg_scg_bray, main = "KEGG: metaGsize vs SCG")
abline(0,1)

mantel(icm_kegg_metaGsize_bray, icm_kegg_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_kegg_metaGsize_bray, ydis = icm_kegg_scg_bray) 
## 
## Mantel statistic r: 0.957 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0530 0.0763 0.1042 0.1325 
## Permutation: free
## Number of permutations: 999
plot(icm_pfam_metaGsize_bray, icm_pfam_scg_bray, main = "PFAM: metaGsize vs SCG")
abline(0,1)

mantel(icm_pfam_metaGsize_bray, icm_pfam_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_pfam_metaGsize_bray, ydis = icm_pfam_scg_bray) 
## 
## Mantel statistic r: 0.9565 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0496 0.0757 0.0990 0.1195 
## Permutation: free
## Number of permutations: 999
#metaGsize
plot(icm_cog_metaGsize_bray, icm_kegg_metaGsize_bray, main = "COG metaGsize vs KEGG metaGsize")
abline(0,1)

mantel(icm_cog_metaGsize_bray, icm_kegg_metaGsize_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_cog_metaGsize_bray, ydis = icm_kegg_metaGsize_bray) 
## 
## Mantel statistic r: 0.9965 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0468 0.0648 0.0856 0.1056 
## Permutation: free
## Number of permutations: 999
plot(icm_cog_metaGsize_bray, icm_pfam_metaGsize_bray, main = "COG metaGsize vs PFAM metaGsize")
abline(0,1)

mantel(icm_cog_metaGsize_bray, icm_pfam_metaGsize_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_cog_metaGsize_bray, ydis = icm_pfam_metaGsize_bray) 
## 
## Mantel statistic r: 0.9995 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0408 0.0632 0.0805 0.0957 
## Permutation: free
## Number of permutations: 999
plot(icm_pfam_metaGsize_bray, icm_kegg_metaGsize_bray, main = "PFAM metaGsize vs KEGG metaGsize")
abline(0,1)

mantel(icm_pfam_metaGsize_bray, icm_kegg_metaGsize_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_pfam_metaGsize_bray, ydis = icm_kegg_metaGsize_bray) 
## 
## Mantel statistic r: 0.9962 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0474 0.0664 0.0821 0.1166 
## Permutation: free
## Number of permutations: 999
#SCG
plot(icm_cog_scg_bray, icm_kegg_scg_bray, main = "COG SCG vs KEGG SCG")
abline(0,1)

mantel(icm_cog_scg_bray, icm_kegg_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_cog_scg_bray, ydis = icm_kegg_scg_bray) 
## 
## Mantel statistic r: 0.9999 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0549 0.0806 0.1034 0.1207 
## Permutation: free
## Number of permutations: 999
plot(icm_cog_scg_bray, icm_pfam_scg_bray, main = "COG SCG vs PFAM SCG")
abline(0,1)

mantel(icm_cog_scg_bray, icm_pfam_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_cog_scg_bray, ydis = icm_pfam_scg_bray) 
## 
## Mantel statistic r: 0.9992 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0551 0.0780 0.1006 0.1263 
## Permutation: free
## Number of permutations: 999
plot(icm_pfam_scg_bray, icm_kegg_scg_bray, main = "PFAM SCG vs KEGG SCG")
abline(0,1)

mantel(icm_pfam_scg_bray, icm_kegg_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_pfam_scg_bray, ydis = icm_kegg_scg_bray) 
## 
## Mantel statistic r: 0.9987 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0550 0.0832 0.1097 0.1364 
## Permutation: free
## Number of permutations: 999

Observations: Very good correlation in all the tests.


Gene tables

Building the distance matrices

icm_gene_scg <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_gene.length.SCG.Norm.counts.tbl", header = TRUE, row.names = 1)
icm_gene_scg <- t(icm_gene_scg)
icm_gene_scg_bray <- vegdist(icm_gene_scg, method = "bray")
icm_gene_metaGsize <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_gene.lengthNorm.metaGsizeGbNorm.counts.tbl", header = TRUE, row.names = 1)
icm_gene_metaGsize <- t(icm_gene_metaGsize)
icm_gene_metaGsize_bray <- vegdist(icm_gene_metaGsize, method = "bray")
icm_gene_tpm <- read.table(file = "/home/srllana/R/Tables/EMOSE-GC_ICM_250bp_gene.TPM.tbl", header = TRUE, row.names = 1)
icm_gene_tpm <- t(icm_gene_tpm)
icm_gene_tpm_bray <- vegdist(icm_gene_tpm, method = "bray")

Dendograms

icm_gene_scg_hclust <- hclust(icm_gene_scg_bray, method = "average")
plot(icm_gene_scg_hclust, main = "Gene SCG")

icm_gene_metaGsize_hclust <- hclust(icm_gene_metaGsize_bray, method = "average")
plot(icm_gene_metaGsize_hclust, main = "Gene metaGsize")

icm_gene_tpm_hclust <- hclust(icm_gene_tpm_bray, method = "average")
plot(icm_gene_tpm_hclust, main = "Gene TPM")

Observations: metaGsize and TPM dendograms are identical and they are similar to the SCG one.

Heatmap

heatmap(as.matrix(icm_gene_scg_bray), Rowv = as.dendrogram(icm_gene_scg_hclust), Colv = "Rowv", symm = TRUE, main = "Gene SCG")

heatmap(as.matrix(icm_gene_metaGsize_bray), Rowv = as.dendrogram(icm_gene_metaGsize_hclust), Colv = "Rowv", symm = TRUE, main = "Gene metaGsize")

heatmap(as.matrix(icm_gene_tpm_bray), Rowv = as.dendrogram(icm_gene_tpm_hclust), Colv = "Rowv", symm = TRUE, main = "Gene TPM")

Observations: Almost identical heatmaps for metaGsize and TPM normalizations.

NMDS

icm_gene_scg_nmds <- monoMDS(icm_gene_scg_bray)
icm_gene_scg_nmds
## 
## Call:
## monoMDS(dist = icm_gene_scg_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_gene_scg, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.05069149 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 67 iterations: Stress nearly unchanged (ratio > sratmax)
plot(icm_gene_scg_nmds, main = "Gene SCG NMDS")

icm_gene_metaGsize_nmds <- monoMDS(icm_gene_metaGsize_bray)
icm_gene_metaGsize_nmds
## 
## Call:
## monoMDS(dist = icm_gene_metaGsize_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_gene_metaGsize, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.05926582 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 83 iterations: Stress nearly unchanged (ratio > sratmax)
plot(icm_gene_metaGsize_nmds, main = "Gene metaGsize NMDS")

icm_gene_tpm_nmds <- monoMDS(icm_gene_tpm_bray)
icm_gene_tpm_nmds
## 
## Call:
## monoMDS(dist = icm_gene_tpm_bray) 
## 
## Non-metric Multidimensional Scaling
## 
## 50 points, dissimilarity 'bray', call 'vegdist(x = icm_gene_tpm, method = "bray")'
## 
## Dimensions: 2 
## Stress:     0.1110667 
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 84 iterations: Stress nearly unchanged (ratio > sratmax)
plot(icm_gene_tpm_nmds, main = "Gene TPM NMDS")

Mantel tests for gene distance matrices

plot(icm_gene_scg_bray, icm_gene_metaGsize_bray, main = "Gene SCG vs metaGsize")
abline(0,1)

mantel(icm_gene_scg_bray, icm_gene_metaGsize_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_gene_scg_bray, ydis = icm_gene_metaGsize_bray) 
## 
## Mantel statistic r: 0.9885 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0517 0.0743 0.0956 0.1201 
## Permutation: free
## Number of permutations: 999
plot(icm_gene_scg_bray, icm_gene_tpm_bray, main = "Gene SCG vs TPM")
abline(0,1)

mantel(icm_gene_scg_bray, icm_gene_tpm_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_gene_scg_bray, ydis = icm_gene_tpm_bray) 
## 
## Mantel statistic r: 0.9884 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0579 0.0800 0.1084 0.1510 
## Permutation: free
## Number of permutations: 999
plot(icm_gene_metaGsize_bray, icm_gene_tpm_bray, main = "Gene metaGsize vs TPM")
abline(0,1)

mantel(icm_gene_metaGsize_bray, icm_gene_tpm_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_gene_metaGsize_bray, ydis = icm_gene_tpm_bray) 
## 
## Mantel statistic r:     1 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0524 0.0752 0.0967 0.1168 
## Permutation: free
## Number of permutations: 999

Observations: Very high correlation between all the gene matrices.


Mantel tests for funtional and gene distance matrices

Mantel tests for some of the matrices. No need to check all the cases due to the fact that it has been observed that functional matices are highly correlated as well as the gene matrices.

plot(icm_cog_metaGsize_bray, icm_gene_metaGsize_bray, main = "COG metaGsize vs Gene metaGsize")
abline(0,1)

mantel(icm_cog_metaGsize_bray, icm_gene_metaGsize_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_cog_metaGsize_bray, ydis = icm_gene_metaGsize_bray) 
## 
## Mantel statistic r: 0.8984 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0451 0.0667 0.0830 0.1118 
## Permutation: free
## Number of permutations: 999
plot(icm_kegg_scg_bray, icm_gene_scg_bray, main = "KEGG SCG vs Gene SCG")
abline(0,1)

mantel(icm_kegg_scg_bray, icm_gene_scg_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_kegg_scg_bray, ydis = icm_gene_scg_bray) 
## 
## Mantel statistic r: 0.931 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0531 0.0675 0.0964 0.1302 
## Permutation: free
## Number of permutations: 999
plot(icm_pfam_metaGsize_bray, icm_gene_tpm_bray, main = "PFAM metaGsize vs Gene TPM")
abline(0,1)

mantel(icm_pfam_metaGsize_bray, icm_gene_tpm_bray)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = icm_pfam_metaGsize_bray, ydis = icm_gene_tpm_bray) 
## 
## Mantel statistic r: 0.9065 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0528 0.0757 0.0993 0.1189 
## Permutation: free
## Number of permutations: 999

Observations: High correlation between the functional and gene distance matrices.